package edu.unc.ils.memai.tokenize;

import java.util.ArrayList;
import java.util.List;

public class MauiTokenizer implements Tokenizer
{
    public List<String> tokenize(String text, boolean disallowInternalPeriods)
    {
        List<String> segments = new ArrayList<String>();
        
        StringBuffer result = new StringBuffer();
        int j = 0;
        boolean phraseStart = true;
        boolean seenNewLine = false;
        boolean haveSeenHyphen = false;
        boolean haveSeenSlash = false;
        while (j < text.length()) {
            boolean isWord = false;
            boolean potNumber = false;
            int startj = j;
            while (j < text.length()) {
                char ch = text.charAt(j);
                if (Character.isLetterOrDigit(ch)) {
                    potNumber = true;
                    if (Character.isLetter(ch)) {
                        isWord = true;
                    }
                    j++;
                } else if ((!disallowInternalPeriods && (ch == '.'))
                        || (ch == '@') || (ch == '_') || (ch == '&')
                        || (ch == '/') || (ch == '\'')) {
                    if ((j > 0) && (j + 1 < text.length())
                            && Character.isLetterOrDigit(text.charAt(j - 1))
                            && Character.isLetterOrDigit(text.charAt(j + 1))) {
                        j++;
                    } else {
                        break;
                    }
                } else if (ch == '\'') {
                    if ((j > 0)
                            && Character.isLetterOrDigit(text.charAt(j - 1))) {
                        j++;
                    } else {
                        break;
                    }
                } else {
                    break;
                }
            }
            if (isWord == true) {
                if (!phraseStart) {
                    if (haveSeenHyphen) {
                        result.append(' ');
                    } else if (haveSeenSlash) {
                        result.append('/');
                    } else {
                        result.append(' ');
                    }
                }
                result.append(text.substring(startj, j));
                if (j == text.length()) {
                    break;
                }
                phraseStart = false;
                seenNewLine = false;
                haveSeenHyphen = false;
                haveSeenSlash = false;
                if (Character.isWhitespace(text.charAt(j))) {
                    if (text.charAt(j) == '\n') {
                        seenNewLine = true;
                    }
                } else if (text.charAt(j) == '-') {
                    haveSeenHyphen = true;
                } else if (text.charAt(j) == '/') {
                    haveSeenSlash = true;
                } else {
                    phraseStart = true;
                    segments.add(result.toString());
                    result = new StringBuffer();
                }
                j++;
            } else if (j == text.length()) {
                break;
            } else if (text.charAt(j) == '\n') {
                if (seenNewLine) {
                    if (phraseStart == false) {
                        segments.add(result.toString());
                        result = new StringBuffer();
                        phraseStart = true;
                    }
                } else if (potNumber) {
                    if (phraseStart == false) {
                        phraseStart = true;
                        segments.add(result.toString());
                        result = new StringBuffer();
                    }
                }
                seenNewLine = true;
                j++;
            } else if (Character.isWhitespace(text.charAt(j))) {
                if (potNumber) {
                    if (phraseStart == false) {
                        phraseStart = true;
                        segments.add(result.toString());
                        result = new StringBuffer();
                    }
                }
                j++;
            } else {
                if (phraseStart == false) {
                    segments.add(result.toString());
                    result = new StringBuffer();
                    phraseStart = true;
                }
                j++;
            }
        }

        return segments;
    }
}
